% Find SELECT for changes to sample, specification, solver, etc

clc;
clear;
format long

% Import data file, SELECT sample, and label variables:
data=importdata('Estimation_data_all.txt');                 % N=2682: all completed main-draw matches with non-missing environmental conditions

% SELECT ability_metric 1=pre-match odds, 2.5=granular rank, or 4=z-scores
ability_metric=1;
% SELECT prize spread 1=ones, 2=cash prize spread, 3=points prize spread
prize_metric=2;
% SELECT finite difference for covariance
h=1e-02;


if ability_metric==1
    dvBet=data(:,10);                             % 1. pre-match odds are available for both players
    data_sample=data(dvBet==1,:);                 % N=2157: all among the 2682 matches for which pre-match odds are available (both players)
elseif ability_metric==2.5
    dvRank=data(:,15);                            % 2.5. (granular) WTA ranks are available for both players
    data_sample=data(dvRank==1,:);                % N=2670: all among the 2682 matches for which WTA ranks are available (both players)
elseif ability_metric==4
    dvPts=data(:,18);                             % 4. normalized ranking points (z-scores) are available for both players
    data_sample=data(dvPts==1,:);                 % N=2670: all among the 2682 matches for which WTA ranking points are available (both players)--this happens to be the same sample as in 2
end

clear data dvBet dvRank dvPts;

N=size(data_sample,1);

w1=data_sample(:,1);           % set 1 games won by match winner
l1=data_sample(:,2);           % set 1 games won by match loser
w2=data_sample(:,3);           % set 2 games won by match winner
l2=data_sample(:,4);           % set 2 games won by match loser
w3=data_sample(:,5);           % set 3 games won by match winner (if applicable)
l3=data_sample(:,6);           % set 3 games won by match loser (if applicable)
%wSets=data_sample(:,7);       % number of sets won by match winner
%lSets=data_sample(:,8);       % number of sets won by match loser
%tSets=data_sample(:,9);       % total number of sets
wWinProb=data_sample(:,11);    % (if pre-match odds are available) ex-ante winning probability of match winner
lWinProb=data_sample(:,12);    % (if pre-match odds are available) ex-ante winning probability of match loser
winProbDiff=data_sample(:,13); % abs(wWinProb-lWinProb)
wRank=data_sample(:,16);       % (if both players are ranked) WTA rank of match winner
lRank=data_sample(:,17);       % (if both players are ranked) WTA rank of match loser
%wPts=data_sample(:,19);       % (if both players have points) WTA ranking points of match winner
%lPts=data_sample(:,20);       % (if both players have points) WTA ranking points of match loser
wPtsN=data_sample(:,21);       % (if both players have points) WTA z-score of match winner
lPtsN=data_sample(:,22);       % (if both players have points) WTA z-score of match loser
%wAge=data_sample(:,24);       % (if age is available) age of match winner
%lAge=data_sample(:,25);       % (if age is available) age of match loser
%ageDiff=data_sample(:,26);    % (if age is available) abs(wAge-lAge)
prizeCashK=data_sample(:,27);  % win-lose cash prize spread of match (in 1000 USD)
prizePts=data_sample(:,28);    % win-lose WTA ranking points prize spread of match  
tpORtp3h=data_sample(:,29);    % temperature during match
Cpm25=data_sample(:,30);       % pollution during match
raining=data_sample(:,32);	   % (if record of rain is available) dummy for any rain during match
%dvAussieO=data_sample(:,33);  % Australian Open match
%dvChinaO=data_sample(:,34);   % China Open match
%dvWuhanO=data_sample(:,35);   % Wuhan Open match
%dvOtherC=data_sample(:,36);   % Other Chinese WTA series match (other than in Beijing and in Wuhan)
round=data_sample(:,37);       % round of WTA series

clear data_sample;
       

% We want to label players according to low (marginal) cost (of effort) "l" and high cost "h" (not winner "w" and loser "l")
% Cost will be a function of ability, so index match winner according to whether she was the low-cost (equivalently, more-able) player

if ability_metric==1
    LowCost_WinsMatch=(wWinProb>=lWinProb);                                                             % (1596 out of 2157 matches = 74%) 1. Ability proxy is pre-match winning probability (betting odds)
    LowCost_Ability =(.01*wWinProb).*LowCost_WinsMatch + (.01*lWinProb).*(ones(N,1)-LowCost_WinsMatch); % max(wWinProb,lWinProb) and min(wWinProb,lWinProb), respectively, would work just as well
    HighCost_Ability=(.01*lWinProb).*LowCost_WinsMatch + (.01*wWinProb).*(ones(N,1)-LowCost_WinsMatch); % (used only for observations where pre-match odds are available for both players)
elseif ability_metric==2.5
    LowCost_WinsMatch=(wRank<=lRank);                                                                   % (1871 out of 2670 matches = 70%) 2.5. (granular) Ability proxy is WTA rank
    LowCost_Ability =wRank.*LowCost_WinsMatch    + lRank.*(ones(N,1)-LowCost_WinsMatch);                % min(wRank,lRank) and max(wRank,lRank), respectively, would work just as well
    HighCost_Ability=lRank.*LowCost_WinsMatch    + wRank.*(ones(N,1)-LowCost_WinsMatch);                % (used only for observations where WTA ranks are available for both players)
elseif ability_metric==4
    LowCost_WinsMatch=(wPtsN>=lPtsN);                                                                   % (1870 out of 2670 matches = 70%) 4. Ability proxy is WTA z-score (normalized ranking points)
    LowCost_Ability =wPtsN.*LowCost_WinsMatch    + lPtsN.*(ones(N,1)-LowCost_WinsMatch);                % max(wPtsN,lPtsN) and min(wPtsN,lPtsN), respectively, would work just as well
    HighCost_Ability=lPtsN.*LowCost_WinsMatch    + wPtsN.*(ones(N,1)-LowCost_WinsMatch);                % (used only for observations where WTA ranking points are available for both players)   
end


% Prepare indicators for match outcomes
LowCost_Set1Games =w1.*(LowCost_WinsMatch==1)+l1.*(LowCost_WinsMatch==0);
HighCost_Set1Games=l1.*(LowCost_WinsMatch==1)+w1.*(LowCost_WinsMatch==0);
LowCost_Set2Games =w2.*(LowCost_WinsMatch==1)+l2.*(LowCost_WinsMatch==0);
HighCost_Set2Games=l2.*(LowCost_WinsMatch==1)+w2.*(LowCost_WinsMatch==0);
LowCost_Set3Games =w3.*(LowCost_WinsMatch==1)+l3.*(LowCost_WinsMatch==0);
HighCost_Set3Games=l3.*(LowCost_WinsMatch==1)+w3.*(LowCost_WinsMatch==0);
outcome_ll= ( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) );
outcome_lhl=( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) & (LowCost_Set3Games>HighCost_Set3Games) );
outcome_lhh=( (LowCost_Set1Games>HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) & (LowCost_Set3Games<HighCost_Set3Games) );
outcome_hll=( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) & (LowCost_Set3Games>HighCost_Set3Games) );
outcome_hlh=( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games>HighCost_Set2Games) & (LowCost_Set3Games<HighCost_Set3Games) );
outcome_hh= ( (LowCost_Set1Games<HighCost_Set1Games) & (LowCost_Set2Games<HighCost_Set2Games) );
outcome=[outcome_ll outcome_lhl outcome_lhh outcome_hll outcome_hlh outcome_hh];
clear LowCost_Set1Games HighCost_Set1Games LowCost_Set2Games HighCost_Set2Games LowCost_Set3Games HighCost_Set3Games;
clear outcome_ll outcome_lhl outcome_lhh outcome_hll outcome_hlh outcome_hh;


% w/l in original variable names is confusing, since l denotes loser, not low-cost player; thus cleanse the variables that will not be used further
clear wRank lRank wPts lPts wPtsN lPtsN l1 l2 l3 w1 w2 w3;
clear wAge lAge ageDiff;

if prize_metric==1
    V_l=ones(N,1);
    V_h=V_l;
elseif prize_metric==2
    V_l=prizeCashK;
    V_h=V_l;
elseif prize_metric==3
    V_l=prizePts;
    V_h=V_l;
end
clear prizeCash prizePts;

% Parameters that are fixed
cutoff_tp=27;
cutoff_pm=150/100;
delta_0=0;

% Parameters that are estimated: Initial values over which search takes place.
k_ini=1;
if ability_metric==1
    theta_ini=0;            % 1. Ability proxy is pre-match winning probability (betting odds)
elseif ability_metric==2.5
    theta_ini=0;            % 2.5. Ability proxy is WTA rank (granular)
elseif ability_metric==4
    theta_ini=0;            % 4. Ability proxy is WTA z-score (normalized ranking points)
end
delta_tp_ini=0; 
delta_pm_ini=0;

parameter_fixed=[ cutoff_tp;            % Temperature (tpORtp3h) cutoff
                  cutoff_pm;            % PM2.5 (Cpm25) cutoff
                  delta_0 ];            % Delta intercept
parameter_ini=[ k_ini;                  % technology parameter (randomness with which cost differences translate into winning probabilities)
                theta_ini;              % cost parameter
                delta_tp_ini;           % temperature disutility parameter (variation in sample is 0 = 25 C to 19 = 44 C)
                delta_pm_ini ];         % PM2.5 disutility parameter (variation in sample is 0 = 100 ug/m3 to 4.2 = 520 ug/m3)

clear k_ini theta_ini delta_tp_ini delta_pm_ini;
        
disp('The mean (negative of the) log likelihood contribution across matches at the initial parameter values is ')
likelihood_obj(parameter_ini,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed)


% Optimization
options = optimset('algorithm','interior-point','display','iter','maxiter',100000,'maxfunevals',100000,'TolX',1e-20,'TolFun',1e-20,'TolCon',1e-6,'Hessian','bfgs');
A=[0 0 1 1; 0 0 -1 -1]; b=[1; 1];
lb=[0; 0;   -Inf; -Inf];
ub=[1; Inf; Inf;  Inf];
tic
%[parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed),parameter_ini,A,b,[],[],lb,ub,[],[],options,[])
[parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed),parameter_ini,[],[],[],[],lb,ub,[],[],options,[])
%[parameter,fval,exitflag,output]=knitromatlab(@(parameter) likelihood_obj(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed),parameter_ini,[],[],[],[],[],[],[],[],options,[])
toc






% Covariance
prob_i=likelihood_contrib(parameter,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);  % prob_i (or f) = likelihood_contrib

f_1plush=likelihood_contrib(parameter+[h;0;0;0] ,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh=likelihood_contrib(parameter+[-h;0;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2plush=likelihood_contrib(parameter+[0;h;0;0] ,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2lessh=likelihood_contrib(parameter+[0;-h;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_3plush=likelihood_contrib(parameter+[0;0;h;0] ,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_3lessh=likelihood_contrib(parameter+[0;0;-h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_4plush=likelihood_contrib(parameter+[0;0;0;h] ,ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_4lessh=likelihood_contrib(parameter+[0;0;0;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);

% Compute d prob_i / d phi
D1=(f_1plush-f_1lessh)/(2*h);
D2=(f_2plush-f_2lessh)/(2*h);
D3=(f_3plush-f_3lessh)/(2*h);
D4=(f_4plush-f_4lessh)/(2*h);

% Compute d2 prob_i / d phi_j_k
D11=(f_1plush-2*prob_i+f_1lessh)/(h.^2);
D22=(f_2plush-2*prob_i+f_2lessh)/(h.^2);
D33=(f_3plush-2*prob_i+f_3lessh)/(h.^2);
D44=(f_4plush-2*prob_i+f_4lessh)/(h.^2);

f_1plush_2plush=likelihood_contrib(parameter+[h;h;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1plush_2lessh=likelihood_contrib(parameter+[h;-h;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_2plush=likelihood_contrib(parameter+[-h;h;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_2lessh=likelihood_contrib(parameter+[-h;-h;0;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1plush_3plush=likelihood_contrib(parameter+[h;0;h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1plush_3lessh=likelihood_contrib(parameter+[h;0;-h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_3plush=likelihood_contrib(parameter+[-h;0;h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_3lessh=likelihood_contrib(parameter+[-h;0;-h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1plush_4plush=likelihood_contrib(parameter+[h;0;0;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1plush_4lessh=likelihood_contrib(parameter+[h;0;0;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_4plush=likelihood_contrib(parameter+[-h;0;0;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_1lessh_4lessh=likelihood_contrib(parameter+[-h;0;0;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
D12=((f_1plush_2plush-f_1lessh_2plush)/(2*h)-(f_1plush_2lessh-f_1lessh_2lessh)/(2*h))/(2*h);
D13=((f_1plush_3plush-f_1lessh_3plush)/(2*h)-(f_1plush_3lessh-f_1lessh_3lessh)/(2*h))/(2*h);
D14=((f_1plush_4plush-f_1lessh_4plush)/(2*h)-(f_1plush_4lessh-f_1lessh_4lessh)/(2*h))/(2*h);

f_2plush_3plush=likelihood_contrib(parameter+[0;h;h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2plush_3lessh=likelihood_contrib(parameter+[0;h;-h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2lessh_3plush=likelihood_contrib(parameter+[0;-h;h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2lessh_3lessh=likelihood_contrib(parameter+[0;-h;-h;0],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2plush_4plush=likelihood_contrib(parameter+[0;h;0;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2plush_4lessh=likelihood_contrib(parameter+[0;h;0;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2lessh_4plush=likelihood_contrib(parameter+[0;-h;0;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_2lessh_4lessh=likelihood_contrib(parameter+[0;-h;0;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
D21=((f_1plush_2plush-f_1plush_2lessh)/(2*h)-(f_1lessh_2plush-f_1lessh_2lessh)/(2*h))/(2*h);
D23=((f_2plush_3plush-f_2lessh_3plush)/(2*h)-(f_2plush_3lessh-f_2lessh_3lessh)/(2*h))/(2*h);
D24=((f_2plush_4plush-f_2lessh_4plush)/(2*h)-(f_2plush_4lessh-f_2lessh_4lessh)/(2*h))/(2*h);

f_3plush_4plush=likelihood_contrib(parameter+[0;0;h;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_3plush_4lessh=likelihood_contrib(parameter+[0;0;h;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_3lessh_4plush=likelihood_contrib(parameter+[0;0;-h;h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
f_3lessh_4lessh=likelihood_contrib(parameter+[0;0;-h;-h],ability_metric,LowCost_Ability,HighCost_Ability,V_l,V_h,outcome,tpORtp3h,Cpm25,parameter_fixed);
D31=((f_1plush_3plush-f_1plush_3lessh)/(2*h)-(f_1lessh_3plush-f_1lessh_3lessh)/(2*h))/(2*h);
D32=((f_2plush_3plush-f_2plush_3lessh)/(2*h)-(f_2lessh_3plush-f_2lessh_3lessh)/(2*h))/(2*h);
D34=((f_3plush_4plush-f_3lessh_4plush)/(2*h)-(f_3plush_4lessh-f_3lessh_4lessh)/(2*h))/(2*h);

D41=((f_1plush_4plush-f_1plush_4lessh)/(2*h)-(f_1lessh_4plush-f_1lessh_4lessh)/(2*h))/(2*h);
D42=((f_2plush_4plush-f_2plush_4lessh)/(2*h)-(f_2lessh_4plush-f_2lessh_4lessh)/(2*h))/(2*h);
D43=((f_3plush_4plush-f_3plush_4lessh)/(2*h)-(f_3lessh_4plush-f_3lessh_4lessh)/(2*h))/(2*h);

H=zeros(4,4,N);

H(1,1,:) = D11./prob_i - D1.*D1./prob_i./prob_i;
H(1,2,:) = D12./prob_i - D1.*D2./prob_i./prob_i;
H(1,3,:) = D13./prob_i - D1.*D3./prob_i./prob_i;
H(1,4,:) = D14./prob_i - D1.*D4./prob_i./prob_i;

H(2,1,:) = D21./prob_i - D2.*D1./prob_i./prob_i;
H(2,2,:) = D22./prob_i - D2.*D2./prob_i./prob_i;
H(2,3,:) = D23./prob_i - D2.*D3./prob_i./prob_i;
H(2,4,:) = D24./prob_i - D2.*D4./prob_i./prob_i;

H(3,1,:) = D31./prob_i - D3.*D1./prob_i./prob_i;
H(3,2,:) = D32./prob_i - D3.*D2./prob_i./prob_i;
H(3,3,:) = D33./prob_i - D3.*D3./prob_i./prob_i;
H(3,4,:) = D34./prob_i - D3.*D4./prob_i./prob_i;

H(4,1,:) = D41./prob_i - D4.*D1./prob_i./prob_i;
H(4,2,:) = D42./prob_i - D4.*D2./prob_i./prob_i;
H(4,3,:) = D43./prob_i - D4.*D3./prob_i./prob_i;
H(4,4,:) = D44./prob_i - D4.*D4./prob_i./prob_i;

Hess=-mean(H,3);
cov_hess=inv(Hess)/N;
se_hess=diag(cov_hess).^(1/2);
tstat_hess=parameter./se_hess;
format shortG
disp('estimate      std dev (Hessian estimate)     t-stat (Hessian estimate)');
[parameter se_hess tstat_hess]

